***********************************************************************************
**** Additional modules and packages required for executing the following code ****
***********************************************************************************

/* 
SUTEX: Stata module to LaTeX code for summary statistics tables
IVREG2: Stata module for extended instrumental variables/2SLS and GMM estimation
RANKTEST: Stata module to test the rank of a matrix using the Kleibergen-Paap rk statistic
ESCOUT: Stata module to make regression tables
INTERFLEX: Stata module to estimate multiplicative interaction models with diagnostics and visualization
REGHDFE: Linear models with many levels of fixed effects
FTOOLS: Fast Stata commands for large datasets
XTSERIAL: Testing for serial correlation in linear panel-data models
CROSSFOLD: Stata module to perform k-fold cross-validation
KRLS: A Stata package for kernel-based regularized least squares
PSACALC: Stata module to calculate treatment effects and relative degree of selection under proportional selection of observables and unobservables
*/

		global dir ="C:\Users\jgw12\Dropbox\Research\Structure\Personalism-repression\data" 
		cd "$dir"
		
		capture log close
		log using Strongman-Repression.log, replace

		set more off 
		set matsize 1000
		global seed ="984353"

		
***********************************
**** Clean and merge data sets ****
***********************************

		* Clean covariate data sets *
			qui do clean-prio
			qui do clean-nmc
			qui do clean-ji
			qui do clean-nelda  /* downloaded 3.8.2018 */
			qui do clean-urdal /* downloaded from https://www.prio.org/Publications/Publication/?x=357 on 3.16.2018 */

		* Coup data *
			set more off
			insheet using http://www.uky.edu/~clthyn2/coup_data/powell_thyne_coups_final.txt, clear  /* downloaded 3.8.2018 */
			rename ccode cowcode
			rename country pt_country
			* 1st coup in Yemen (ccode=680) 1968 is South Yemen but the second coup in August is actually in N/All-Yemen
			recode cow (679=678)
			recode cow (680=678) if year==1968 & month==8
			gen  p = "/"
			egen d = concat(day p month p year)
			gen date  = date(d, "DMY")
			gen coupA = coup==1
			gen coupS = coup==2
			drop p d
			local var = "coup coupA coupS"
			foreach v of local var {
				bysort cow year: egen max`v'=max(`v')
				replace `v' =max`v'
				drop max`v'
			}
			egen tag= tag(cow year)
			keep if tag==1
			drop tag
			tsset cow year
			sort cow year
			saveold coups-merge,replace
			
		* EPR data *
			use epr-original, clear
			recode cow (260=255) (679=678) /* Yemen and West Germany are continuous regimes across unification */
			tsset cow year
			gen e = l.ethfrac
			replace ethfrac  = e 
			drop e
			tsset cow year
			gen excluded = l.exclpop 
			gen  loggdp = ln(gdpcapl)
			gen logoil = ln(1+oilpcl)
			tsset cow year
			gen gr= d.loggdp
			tssmooth ma  grow = gr, window(2 0 0)
			replace grow =gr if grow==. & gr~=.
			sort cow year
			saveold epr-merge,replace
			
		* NAVCO data *  /* downloaded 3.8.2018 */
			use navco2-original, clear 
			sum year  /* note that this ends in 2006!!!*/
			gen violent = 1 if prim_method==0
			gen nonvio = 1 if prim_method==1
			gen regch = 1 if camp_goal==0
			gen regchnv = camp_goal==0 & nonvio==1
			gen regchv = camp_goal==0 & violent==1
			gen cowcode = lccode
			recode cow (347=345) (364=365)        /* Russia/USSR get same cowcode; Yugoslavia */
			recode cow (531=530) if year<1990     /* Eritrea coded as Ethiopia prior to 1990 */
			local var = "violent nonvio regch regchnv regchv"
			foreach v of local var {
				egen nav_`v' = max(`v'), by(cow year)
			}
			egen tag = tag(cow year)
			keep if tag
			drop tag
			keep cow year nav_* location
			sort cow year
			merge cow year using gwf-original
			tab _merge        /* note that GWF consider pre-1955 Vietnam as "not independent" */
			recode nav_* (.=0)  if year<=2006
			tsset cow year
			gen nav_protest = l.nav_viol==1 | l.nav_nonvio==1  if l.nav_nonvio~=. 
			gen nav_protest_regch = l.nav_regch==1 if l.nav_nonvio~=. 
			gen nav_protestNV = l.nav_nonvio==1   if l.nav_nonvio~=.
			gen nav_protestV = l.nav_vio==1   if l.nav_nonvio~=.
			rename _merge merge4
			drop if merge4 ==1
			drop merge4
			rename location nav_country
			sort cow year
			saveold navco-merge, replace
			
		* Fariss data *   /* downloaded 3.8.2018 */
			use fariss-original,clear
			recode cow (679=678) if year>1990
			local var="ciri disap kill polpris tort amnesty state hathaway itt genocide rummel massive_repression executions killing additive"
			foreach v of local var {
				replace `v' = "." if `v'=="NA"
				destring `v', replace
			}
			sort cow year
			saveold fariss-merge,replace

		* Personalism data *
			use gwf-original,clear
			
			* Generate binary variables *
			gen milmerit_persB = milmerit_pers
			recode milmerit_persB (2=1) (1=0)  
			tsset gwf_caseid year
			gen newparty =support==1 & l.support==0
			gen yr = year if newparty==1
			egen yrs = max(yr), by(gwf_leaderid)
			tsset gwf_caseid year
			replace newparty=1 if l.newparty==1 & l.gwf_leaderid==gwf_leaderid & year==year[_n-1]+1
			gen createparty =militparty_new==1 | (newparty==1  & partyhistory_post==1) 
			
			* Label variables *
			global pvars1 = "partyexcom_pers partyrbr officepers createparty"
			global pvars2 = "milnotrial milmerit_persB paramil_pers sectyapp_pers"
			label var officepers "Appointments to high office"
			label var createparty "Create new party"
			label var partyexcom_pers "Party exec committee"
			label var partyrbr "Rubber stamp party"
			label var milmerit_persB "Military promotions"
			label var milnotrial "Military purge"
			label var sectyapp_pers "Security apparatus"
			label var paramil_pers "Paramilitary"
			set seed 2453456
			
			* IRT model *
			irt 2pl $pvars1 $pvars2 
			estat report $pvars1 $pvars2, byparm sort(b)
			predict pers_2pl, latent se(pers_se_2pl)
			
			* IRT plots *
			irtgraph iif  (sectyapp_pers,lcolor(blue)) (milmerit_pers,lcolor(red)) (milnotrial,lcolor(green)) ///
			(paramil_pers,lcolor(cyan)),legend(col(2) pos(6)) title(Security & military items) saving(t2,replace) ///
			ylab(,glcolor(gs15)) xtitle("Personalism ({&theta})")
			irtgraph iif  (officepers,lcolor(blue)) (partyexcom_pers,lcolor(red)) (partyrbr,lcolor(green)) ///
			(createparty,lcolor(cyan)),legend(col(2) pos(6))  title(Party & personnel items) saving(t1,replace) ///
			ylab(,glcolor(gs15)) xtitle("Personalism ({&theta})")
			gr combine t1.gph t2.gph, col(2)   ysize(5.5) xsize(9)  ycommon
			erase t1.gph
			erase t2.gph
			
			* IRT-7 model *
			irt 2pl $pvars1 milmerit_persB paramil_pers sectyapp_pers
			estat report $pvars1 milmerit_persB paramil_pers sectyapp_pers, byparm sort(b)
			predict pers7_2pl, latent se(pers7_se_2pl)
			
			* Standardize, rescale *
			qui sum pers_2pl
			gen latent_personalism = (pers_2pl+abs(r(min))) / (r(max) - r(min))
			hist latent, bin(50)
			qui sum pers7_2pl
			gen latent7_personalism = (pers7_2pl+abs(r(min))) / (r(max) - r(min))
			
			* Variance decomposition *
			qui xtset gwf_leaderid year
			qui xtsum `i'
			qui scalar sdb = r(sd_b)
			qui scalar sdw = r(sd_w)
			qui scalar vart= sdb + sdw
			qui scalar varr = sdw / vart
			scalar list sdw
			scalar list varr
			sort cow year
			save temp-pers,replace
			
		* Merge data sets *
			use temp-pers,clear
			merge cow year using fariss-merge
			tab _merge
			drop if _merge==2
			tab year if _merge==1  /* note that 1946-1948 are not in the Fariss data */
			rename _merge merge1
			sort cow year
			save temp,replace
			merge cow year using navco-merge
			tab _merge
			drop if _merge==2
 			rename _merge merge2
			sort cow year
			save temp,replace
			merge cow year using epr-merge
			tab _merge
			tab gwf_country if _merge==1  /* note that Singapore and Oman are not in EPR data */
			drop if _merge==2
 			rename _merge merge3
			sort cow year
			save temp,replace
			merge cow year using coups-merge
			tab _merge
			tab year if _merge==2  /* note that coup data begin in 1950 */
			drop if _merge==2
 			rename _merge merge4
			recode coup* (.=0) if year>=1950 & year<=2010
			sort cow year
			save temp,replace
			merge cow year using nelda-merge
			tab _merge
			drop if _merge==2
 			rename _merge merge5
			rename nelda_country Nelda_country
			recode nelda* (.=0) if year>=1945 & year<=2010
			rename Nelda_country nelda_country
			sort cow year
			save temp,replace
			merge cow year using prio-mergeB
			tab _merge
			drop if _merge==2
 			rename _merge merge6
			rename prio_country Prio_country
			recode prio* (.=0) if year>=1945 & year<=2010
			rename Prio_country prio_country
			sort cow year
			save temp,replace
			merge cow year using nmc-merge
			tab _merge
			drop if _merge==2
 			rename _merge merge7
			sort cow year
			save temp,replace
			merge cow year using GWFtscs
			tab _merge
			rename _merge merge8
			sort cow year
			merge cow year using urdal-merge
			tab _merge
			rename _merge merge9
			sort cow  year
			merge cow year using latent-protest-data
			tab _merge
			rename _merge merge10
			sort cow year
			merge cow year using ji-merge
			tab _merge   
			tab gwf_country if _merge==1  /* no JI data for South Vietnam */
			rename _merge merge11
			keep if gwf_caseid~=.
			saveold temp,replace
 
		

		* Variable transformations *
		gen seniorofficer = militrank>=4 & leadermil==1
		gen juniorofficer = militrank>=1 & militrank<=3 & leadermil==1
		qui sum latentmean 
		gen repression = (latentmean - r(mean))/r(sd)  /*standardize within sample */
		replace repression = repression*-1   /* flip scale */
		qui sum lpopl
		replace lpopl = (lpopl - r(mean))/r(sd)  /*standardize within sample */
		qui sum loggdp
		replace loggdp = (loggdp - r(mean))/r(sd)  /*standardize within sample */
		qui sum G_age
		replace G_age = (G_age - r(mean))/r(sd)  /*standardize within sample */
		gen lt = ln(gwf_leader_duration)
		qui sum lt
		replace lt = (lt - r(mean))/r(sd)  /*standardize within sample */
		gen civwar = prio_lconflict_intra>0  & prio_lconflict_int_intra==2
		gen intwar = prio_lconflict_inter>0  & prio_lconflict_int_inter==2
		xtset gwf_caseid year
		gen mpartyelec = nelda_mparty==1 | l.nelda_mparty==1 | f.nelda_mparty==1
		gen coup012 =  coupA==1 | l1.coupA==1 | l2.coupA==1  | coupS==1 | l1.coupS==1 | l2.coupS==1  
		gen inheritparty = (partyhistory_priorwonsupport==1 | partyhistory_priorno | /*
				*/ partyhistory_insurgent==1 | partyhistory_priordem==1)  if gwf_case_duration==1 | year==1946
		egen inh= max(inheritparty),by(gwf_caseid)   /* ensure no within case variation */
		replace inherit = inh
		drop inh
		tab gwf_prior
		gen priordem = gwf_prior=="democracy"
		gen priornondem_mil = gwf_prior=="dictatorship_mil"
		gen priornondem_nonmil = gwf_prior=="dictatorship_nonmil"
		gen firstlow = gwf_firstldr==1 & ldr_exp_lowrank==1
		egen maxfirstlow = max(firstlow), by(gwf_caseid)
		gen divided = seizure_uprising==1 | (seizure_coup==1 & maxfirstlow==1)
		drop maxfirstlow
		rename latent_pers xpers
		gen institutions = (support*legcomp)/8  /* 0-1 scale */
		* Obtain estimating sample *
		qui reg latentmean  xpers
		keep if e(sample)==1 & year>=1950  &year<=2010
		* Save data *
		sort cow year
		saveold temp, replace

********************
**** China data ****
********************
		use temp,clear
		list year repression xpers if gwf_country=="China" & (year==1989 | year==2000 | year==1968), clean noobs
		twoway (hist xpers, bin(35) freq   title("Personalism distribution"))  ///
		(pcarrowi 240 .75 170 .87 ,color(red)saving(t1.gph,replace) legend(off) text(245 .75  "China" "1968" , place(t))) ///
		(pcarrowi 250 .12 180 0.01 ,color(red) ytitle(Frequency) xtitle(Personalism) legend(off) text(255 .12 "China" "post-Deng", place(t)))
		twoway (hist repression, bin(35) freq saving(t2.gph, replace)  ytitle(Frequency) xtitle(Repression)  title(Repression distribution)) ///
		 (pcarrowi 150 1.45 130 .75 ,color(red) legend(off) text(154 1.45 "China 2000", place(e))) ///
		 (pcarrowi 128.5 1.91 112.5 1.31 ,color(red) legend(off) text(128.5 1.91 "China 1989", place(e)))
		gr combine t1.gph t2.gph,col(2) ysize(3) title("Dictatorships, 1950-2010")
		graph export "$dir\pers-repression-distributions-with-China.pdf", as(pdf) replace	
		twoway (hist xpers, bin(35) freq   title("Personalism distribution"))  ///
		(pcarrowi 240 .75 170 .87 ,color(red)saving(t1.gph,replace) legend(off) text(245 .75  "China" "1968" , place(t))) ///
		(pcarrowi 250 .12 180 0.01 ,color(red) ytitle(Frequency) xtitle(Personalism) legend(off) text(255 .12 "China" "post-Deng", place(t)))
		twoway (hist repression, bin(35) freq saving(t2.gph, replace)  ytitle(Frequency) xtitle(Repression) ///
		title(Repression distribution))  
		twoway (hist xpers, bin(35) freq saving(t1.gph,replace) xtitle(Personalism) title("Personalism distribution"))  
		gr combine  t2.gph t1.gph,col(2) ysize(3) title("Dictatorships, 1950-2010")
		graph export "$dir\pers-repression-distributions.pdf", as(pdf) replace
		
**********************************************************************
**** Show that personalism index is higher in personalist regimes ****
**********************************************************************
use temp,clear
table gwf_regimetype, c(mean xpers median xpers)
local var = "gwf_pers gwf_party gwf_mon gwf_mil"
foreach v of local var {
	ttest xpers, by(`v')
}
* .61 (personalist), .42 (dominant party), .42 (monarchy), .26 (military junta)


 
		
******************
**** Analysis ****
******************
		use temp,clear
		keep if year>=1955
		* rescale vars *
		local var = "ythblgap  loggdp lpopl mean5"
		foreach v of local var {
			qui: sum `v'
			qui replace `v'= (`v'-r(mean))/r(sd)
		}
		global leadervar="seniorofficer juniorofficer"
		global conflictvar = "civwar intwar mean5"
		label var repress "Repression"
		label var xpers "Personalism"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt "Leader time in power (log)"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var institution "Institutions"
		label var year "Year"
		sutex repression year xpers lt loggdp lpopl $conflictvar $leadervar institutions year, ///
			minmax labels file($dir/sumstats.tex) replace
		
		
		set seed $seed
		xtset gwf_caseid year  /* note that the cross-section unit is the regime-case, not country */
		* check within variation in estimating sample */
		local var = "repression xpers"
		foreach i of local var {
			qui xtsum `i'
			scalar sdb`i' = r(sd_b)
			scalar sdw`i' = r(sd_w)
			scalar vart`i'= sdb`i' + sdw`i'
			scalar varr`i' = sdw`i' / vart`i'
			scalar list varr`i'
		}
		
		* Missingness not correlated with repression or personalism *
		qui:reg repression   xpers lt loggdp lpopl mean5
		gen s=e(sample)
		gen miss =s==0
		tab miss  /* 1.94 percent of sample is missing data on GDP and/or population */
		xi:qui xtreg repression i.year miss, cluster(gwf_caseid) fe
		lincom miss
		xi:qui xtreg xpers i.year miss, cluster(gwf_caseid) fe
		lincom miss
		drop miss s
		
		* Fit the data *
		twoway scatter repression xpers,mcol(gs13) ||  lfit repression xpers,lcol(blue) || ///
			lpolyci repression xpers,col(red) legend(lab(2 "Linear fit") lab(3 "Nonlinear fit") ///
			order(2 3) pos(5) col(1) ring(0))
		qui:reg repression xpers  
		avplot xpers
		qui:reg repression xpers i.year
		avplot xpers		
		qui:reg repression xpers i.gwf_caseid  
		avplot xpers
		qui:reg repression xpers i.gwf_caseid i.year
		avplot xpers
		lvr2plot
 
		* Table 1 FE specifications with case FE and leader cluster *
		xi: ivreg2 repression i.gwf_caseid i.year xpers, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1a
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1b
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1c
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $leadervar, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1d
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1e
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store r1f
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt "Leader time in power"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var institution "Institutions"
		coefplot (r1a, msym(d)) (r1b, msym(t)) (r1c, msym(oh)) (r1d, msym(plus)) (r1e, msym(P)) (r1f, msym(T)), ///
		title("Correlates of domestic repression", size(medium)) drop(_cons  _I*) order(xpers) xline(0) ///
		grid(glcolor(gs15)) mfcolor(white) xlabel(-.5 (.5) 1)  levels(95 90)  ///
		legend(off) ysize(2) xsize(1.5)  xtitle("Coefficient estimate", height(6)) ///
		note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-model-1.pdf", as(pdf)   replace
		estout r1a r1b r1c r1d r1e r1f using TableA1.tex, cells(b(star  fmt(%9.3f)) se(par fmt(%9.2f))) ///
		stats(r2 N N_clust) style(tex) replace label starlevels(* 0.05) title(\label{tabA1})		
					
		* show fe/re result with regime-case cluster *
		xi: qui xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, fe cluster(gwf_caseid)
		lincom xpers
		qui xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institutions, fe cluster(gwf_caseid)
		lincom xpers  /* year FE and coldwar give roughly same estimate on xpers */
			* Hausman test *
				qui xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institutions, fe  
				est store fixed
				qui xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institutions, re  
				est store random
				hausman fixed ., sigmamore constant  /* rejects RE model in favor of FE model; personalism estimate statistically the same in both */
		qui xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institutions, re cluster(gwf_caseid) theta
		lincom xpers
		scalar er       =e(sigma_e)
		scalar ur_re    =e(sigma_u)
		scalar theta_med=e(thta_50)
		egen count =count(year) if e(sample),by(gwf_caseid)
		egen tag=tag(gwf_caseid)
		hist count if tag==1,bin(20)
		gen theta_re=1-sqrt((er^2)/(count*ur_re^2+er^2))
		hist theta if tag==1,bin(50) xlab(.4(.1).9) freq xtitle(Theta)  /* thetas are large; unit means weighted close to one for many panels */
		graph export "$dir\thetas.pdf", as(pdf)   replace
		drop theta count tag
		
		* RE specifications *
		global leadervar="seniorofficer juniorofficer"
		global conflictvar = "civwar intwar mean5"
		xtreg repression coldwar xpers, cluster(gwf_caseid)
		est store r2a
		xtreg repression coldwar xpers lt loggdp lpopl, cluster(gwf_caseid)
		est store r2b
		xtreg repression coldwar xpers lt loggdp lpopl $conflictvar, cluster(gwf_caseid)
		est store r2c
		xtreg repression coldwar xpers lt loggdp lpopl $leadervar, cluster(gwf_caseid)
		est store r2d
		xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar, cluster(gwf_caseid)
		est store r2e
		xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_caseid)
		est store r2f
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt "Leader time in power"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var institution "Institutions"
		label var coldwar `""Cold war"  "(1955-1989)""'
		coefplot (r2a, msym(d)) (r2b, msym(t)) (r2c, msym(oh)) (r2d, msym(plus)) (r2e, msym(P)) (r2f, msym(T)), ///
		title("Regime-case RE models", size(medium)) drop(_cons  _I*) order(xpers) xline(0) ///
		grid(glcolor(gs15)) mfcolor(white) xlabel(-.5 (.5) 1)  levels(95 90)  ///
		legend(off) ysize(2) xsize(1.5) saving(r1, replace)	xtitle("Coefficient estimate", height(6)) ///
		note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-model-2.pdf", as(pdf)   replace
		
		* check with HAC errors *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution, ///
			rob bw(7) partial(i.gwf_caseid i.year)	
		* add leader age to specification *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution G_age, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t1
		* add NAVCO protests to specification, only up to 2006 *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution nav_protestV nav_protestNV, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t2
		* add judicial independence to specification *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution  lji, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t3
		* add elections to specification *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution mpartyelec, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t4
		* add coups to specification *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution coupA coupS, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution coup012, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t5
		* add youth buldge to specification, only up to 2000 *
		xi: ivreg2 repression i.gwf_caseid i.year xpers lt loggdp lpopl $conflictvar $leadervar institution ythblgap, ///
			cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
			est store t6
		xtset gwf_caseid year
		xtreg repression coldwar xpers lt loggdp lpopl $conflictvar $leadervar institution ythblgap, cluster(gwf_caseid)  		
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var lt `""Leader time" "in power""'
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var inst "Institutions"
		label var G_age "Leader age"
		label var nav_protestV `""Violent" "protest camp.""'
		label var nav_protestNV `""Non-violent" "protest camp.""'
		label var lji `""Judicial" "indep.""'
		label var mpartyelec `""Multiparty" "election""'
		label var coup012 "Reccent coup"
		label var ythblgap "Youth bulge"
		coefplot (t1, msym(d)) (t2, msym(t)) (t3, msym(oh)) (t4, msym(plus)) (t5, msym(P)) (t6, msym(T)), ///
		title("Additional covariates", size(medsmall)) drop(_cons  _I* D*) order(xpers) xline(0) ///
		grid(glcolor(gs15)) mfcolor(white) xlabel(-1.5(.5)1) levels(95 90)  ///
		legend(off) ysize(10)  xtitle("Two-way fixed effects estimate", height(6)) ///
		note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-additional-covariates.pdf", as(pdf)   replace
		
		
		* change the cross-section unit to country/leader *
		xtset cow year
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, re cluster(cow) 
		est store cs1
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, fe cluster(cow) 
		est store cs2
		xtset gwf_caseid year
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, re cluster(gwf_caseid)	
		est store cs3
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, fe cluster(gwf_caseid)	
		est store cs4
		xtset gwf_leaderid year
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar $leadervar institutions, re cluster(gwf_leaderid)	
		est store cs5
		qui:xtreg repression i.year xpers lt loggdp lpopl $conflictvar  institutions, fe cluster(gwf_leaderid)	
		est store cs6	
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var lt `""Leader time" "in power""'
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var inst "Institutions"
		coefplot (cs1, msym(d)) (cs2, msym(t)) (cs3, msym(oh)) (cs4, msym(plus)) (cs5, msym(P)) (cs6, msym(T)), ///
		title("Changing the cross-section unit", size(medsmall)) keep(xpers lt loggdp lpopl $conflictvar $leadervar institutions) order(xpers) xline(0) ///
		grid(glcolor(gs15)) mfcolor(white) xlabel(-1.5(.5)1) levels(95 90)  ///
		legend(lab(3 "Country, RE") lab(6 "Country, FE") lab(9 "Case, RE") lab(12 "Case, FE") lab(15 "Leader, RE") lab(18 "Leader, FE") pos(6)col(3)ring(1)) ///
		ysize(8)  xtitle("Two-way fixed effects estimate", height(6)) ///
		note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-change-cs-unit.pdf", as(pdf)   replace
		xtset gwf_caseid year
		
		* check that marginal effect is relatively constant across time *
		drop if year>2010
		interflex repression xpers year lt loggdp lpopl $conflictvar $leadervar institutions , ///
			cluster(gwf_caseid) fe(gwf_caseid year) nbins(4) xr(1960 2000)  title(Calendar time in four equal bins) ///
			xlab(Year) ylab(Repression) dlab(Personalism) seed($seed) saving(time1)
	 

		
*****************
* Lag DV models *
*****************
		use temp,clear
		set seed $seed
		xtset gwf_caseid year
		gen lagrepress = l.repress
		* check for autocorrelation in Lag DV *
		xi: xtserial repress i.year lagrepress lt loggdp lpopl $conflictvar $leadervar institutions xpers 
		xi: qui reg repress i.year l.repress lt loggdp lpopl $conflictvar $leadervar institutions xpers,cluster(gwf_caseid)
		lincom xpers
 		xi: qui xtgls repress i.year l.repress lt loggdp lpopl $conflictvar $leadervar institutions xpers,corr(ar1) force
		lincom xpers
		xi: qui xtgls repress i.year l.repress lt loggdp lpopl $conflictvar $leadervar institutions xpers,corr(psar1) force
		lincom xpers
		xi: qui xtpcse repress i.year l.repress lt loggdp lpopl $conflictvar $leadervar institutions xpers,corr(ar1) het
		lincom xpers
		xi: qui xtpcse repress i.year l.repress lt loggdp lpopl $conflictvar $leadervar institutions xpers,corr(psar1) het
		lincom xpers
		
		* Full ECM, calculating the LRM *
		xi:ivreg2 repress (d.repress=l.repress) i.year  d.xpers xpers,rob bw(3) 
		est store ecm1a
		xi:ivreg2 repress (d.repress=l.repress) i.year d.lt lt d.loggdp loggdp d.lpopl lpopl d.xpers xpers,rob bw(3) 
		est store ecm1b
		xi:ivreg2 repress (d.repress=l.repress) i.year d.lt lt d.loggdp loggdp d.lpopl lpopl d.civwar civwar ///
			d.intwar intwar d.mean5 mean5 d.xpers xpers,rob bw(3) 
		est store ecm1c
		xi:ivreg2 repress (d.repress=l.repress) i.year d.lt lt d.loggdp loggdp d.lpopl lpopl ///
			d.senior senior d.junior junior d.xpers xpers,rob bw(3) 
		est store ecm1d
		xi:ivreg2 repress (d.repress=l.repress) i.year d.lt lt d.loggdp loggdp d.lpopl lpopl d.civwar civwar ///
			d.intwar intwar d.mean5 mean5 d.senior senior d.junior junior d.xpers xpers,rob bw(3)
		est store ecm1e
		xi:ivreg2 repress (d.repress=l.repress) i.year d.lt lt d.loggdp loggdp d.lpopl lpopl d.civwar civwar ///
			d.intwar intwar d.mean5 mean5 d.senior senior d.junior junior d.inst inst d.xpers xpers,rob bw(3)
		est store ecm1f
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt `""Leader time" "in power""'
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var inst "Institutions"
		coefplot (ecm1a, msym(d)) (ecm1b, msym(t)) (ecm1c, msym(oh)) (ecm1d, msym(plus)) (ecm1e, msym(P)) (ecm1f, msym(T)), ///
		title("Error-correction model", size(medsmall)) drop(_cons  _I* D*) order(xpers) xline(0) ///
		grid(glcolor(gs15)) mfcolor(white) xlabel(-2 (1) 4)   levels(95 90)  ///
		legend(off) ysize(2.) xsize(1.5)  	xtitle("Long-run multiplier estimate", height(6)) ///
		note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-ecm.pdf", as(pdf)   replace


***************************		
* K-fold cross validation *
***************************
* Show that RE model with coldwar indicator and NOT twoway-FE is better model fit *
		use temp,clear
		set seed $seed
			
			capture program drop crossvalrmse
			program define crossvalrmse
				mat A=r(est)
				mat U = J(rowsof(A),1,1)
				mat c = U'*A
				mat cm = c/rowsof(A)
				mat list cm
			end
						
			capture program drop crosscalc
			program define crosscalc
				qui: crossfold xtreg repression  i.year  loggdp lpopl, fe k(10)
				crossvalrmse
				mat cm2W=cm
				qui: crossfold xtreg repression  coldwar  loggdp lpopl, fe k(10)
				crossvalrmse
				mat cmFE=cm
				qui: crossfold xtreg repression  coldwar  loggdp lpopl, re k(10)
				crossvalrmse
				mat cmRE=cm
			end
	
			simulate rmse2W =cm2W[1,1] rmseFE =cmFE[1,1] rmseRE =cmRE[1,1], rep(1000):crosscalc
			gen med=.
			gen lo=.
			gen hi=.
			gen n=_n
			centile rmse2W, centile(2.5 50 97.5)
			replace lo = r(c_1) if n==1
			replace med = r(c_2) if n==1
			replace hi = r(c_3) if n==1
			centile rmseFE, centile(2.5 50 97.5)
			replace lo = r(c_1) if n==2
			replace med = r(c_2) if n==2
			replace hi = r(c_3) if n==2
			centile rmseRE, centile(2.5 50 97.5)
			replace lo = r(c_1) if n==3
			replace med = r(c_2) if n==3
			replace hi = r(c_3) if n==3
			replace hi = hi+.00075
			replace lo = lo-.00075
			twoway (rspike lo hi n if n<=3,title(Baseline specification))  (scatter med n if n<=3,msym(plus)xscale(range(0.8 3.2)) ///
			xlab(1 "2-way FE" 2 "FE + Cold war" 3 "RE + Cold war") legend(off) ytitle(RMSE) ///
			xtitle(Estimator,size(med) height(6)) ylab(.84(.04).96))
			graph export "$dir\repression-estimator-rmse.pdf", as(pdf)   replace
	
		* RE and FE yield similar estimates for personalism; OLS may be biased upwards: use RE for cross-validation *
		use temp,clear
		set seed $seed
		qui:xtreg repression  coldwar xpers lt loggdp lpopl $conflictvar $leadervar inst,cluster(gwf_caseid) fe
		lincom xpers 
		qui xtreg repression  coldwar xpers lt loggdp lpopl $conflictvar $leadervar inst,cluster(gwf_caseid) re
		lincom xpers
		qui reg repression  coldwar xpers lt loggdp lpopl $conflictvar $leadervar inst,cluster(gwf_caseid) 
		lincom xpers
 
* set globals *
	set matsize 1000
	global y = "repression"   /* outcome variable */
	global id = "gwf_caseid"  /*cross-section unit */
	global x = "coldwar loggdp lpopl"  /* baseline covariates */
	global m = 1000 /* number of loops for the 10-fold cross-validation */
	global v = 28 /* number of variables to test in the cross-validation tests */
	global k =10 /* # folds for k-fold */
	global varlist ="ythblgap polity nav_protestNV lt inherit junior divided logoil priordem support mparty grow gwf_military gwf_person legcomp gwf_party coup012 gwf_monarc excluded senior G_age anocl xpers  lji intwar mean5 civwar nav_protestV"

* program to calculate the mean RMSE from one k-fold cross-validation with test variable *
	capture program drop crossval
	program define crossval
		qui:crossfold xtreg $y $x $test, re k($k)
		mat A=r(est)
		mat U = J(rowsof(A),1,1)
		mat c = U'*A
		mat cm = c/rowsof(A)
	end

* cross-validate v varibles m times/loops to generate uncertainty estimates *
	use temp,clear
	set seed $seed
	matrix sims = J($m,$v,.) 
	matrix colnames sims = $varlist
	forval m =1(1)$m {
		local i =1
		local var ="$varlist"
		foreach v of local var {
			global test = "`v'"
			qui:crossfold xtreg $y $x if $test~=., re k($k)  /* get baseline RMSE without test variable in specification */
			mat A=r(est)
			mat U = J(rowsof(A),1,1)
			mat c = U'*A
			mat xm = c/rowsof(A)   /* baseline RSME foreach of m simulations */
			global rmse = xm[1,1]
			qui crossval 
			qui mat sims[`m',`i'] = -1*($rmse - cm[1,1])  
			local i = `i'+1
		}
		di `m'
	}

* convert the sims stored in a matrix to sims stores as variable values for plotting *
	local var ="$varlist"
	foreach v of local var {
		gen sims_`v'=.
	} 
	gen  n=_n
	local i =1
	local var ="$varlist"
	foreach v of local var {
		forval m =1(1)$m {
			local sim=sims[`m',`i']  
			qui replace sims_`v'=`sim' if n==`m'
		} 
		local i = `i'+1
	}
	
* calculate sims medians and uncertainty from sims stored as variable values *
	gen test =""
	gen med=.
	gen hi=.
	gen lo=.
	local var ="$varlist"
	local i =1
	foreach v of local var {
		qui replace test = "`v'" if n==`i'
		qui centile sims_`v', centile(2.5 50 97.5)
		qui replace lo = r(c_1) if n==`i'
		qui replace med = r(c_2) if n==`i'
		qui replace hi = r(c_3) if n==`i'
		local i = `i'+1
	} 

* Look at point estimates for change in RMSE, by test variable *
	list n test med if n<=$v, clean noobs

* Plot change in RMSE *
twoway (rspike lo hi n if n<=$v,hori col(red)lwidth(thin)) (scatter n med if n<=$v,col(blue) msymbol(plus) msize(tiny) ///
		xtitle(Change in RMSE,height(6)) ytitle("") legend(off) xline(0) title(10-fold cross-validation for test variables) ///
		ylab(1 "Youth bulge"  2 "Polity score"  3 "Non-violent protest" 4 "Leader time"    ///
		5 "Inherited party" 6  "Junior officer" 7 "Divided seizure group" 8 "Oil rents" 9 "Prior democracy" 10 "Support party" ///
		11 "Election" 12 "Economic growth" 13 "Military regime" 14 "Personalist regime" 15 "Legislative competition"   ///
		16 "Party regime" 17 "Coup" 18 "Monarchy" 19 "Ethnic exclusion" 20 "Senior military officer" ///
		21 "Leader age" 22 "Anocracy" 23 "{bf:Personalism}" 24 "Judicial independence" 25 "Int'l conflict" ///
		26 "Protest" 27 "Civil conflict"  28 "Violent protest campaign") xlab(-.06(.02).02)) ///
		(scatter n med if n==23,col(black)msize(medium)msymbol(plus)) 
	graph export "$dir\repression-cross-validation.pdf", as(pdf)   replace
	
	* RE model with best fit *
		xtreg repression coldwar civwar intwar nav_protestV mean5 loggdp lpopl lji xpers,re cluster(gwf_caseid) theta
		est store r2
		xtreg repression coldwar civwar intwar nav_protestV mean5 loggdp lpopl lji xpers anocl,re cluster(gwf_caseid) theta
		est store r3	
		label var repress "Repression"
		label var xpers "{bf:Personalism}"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var anocl "Anocracy"
		label var nav_protestV "Violent protest"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var coldwar "Cold war"
		label var mean5 "Protest"
		label var lji `""Judicial" "independence""'
		coefplot (r2, msymbol(d)) (r3, msymbol(t)), title("Best fit models" "RE with Cold War indicator", size(medium)) ///
				drop(_cons  _I*) order(xpers) xline(0) grid(glcolor(gs15)) mfcolor(white) xlabel(-1.5 (.5) .5) ///
				levels(95 90) xtitle("Coefficient estimate", height(6)) xsize(1.5) ysize(2) ///
				legend(lab(3 "Drop Anocracy") lab(6 "Include Anocracy") pos(6) ring(1) col(2))  ///
				note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))	
		graph export "$dir\repression-model-best-fit.pdf", as(pdf)   replace
		
		qui xtreg repression coldwar civwar intwar loggdp lpopl nav_protestNV lji mean5 xpers
		lincom xpers
		qui reg repression coldwar civwar intwar loggdp lpopl nav_protestNV lji mean5 xpers
		lincom xpers	
		krls repression coldwar civwar intwar loggdp lpopl nav_protestNV lji mean5 xpers, deriv(d)		
		twoway lpolyci d_xpers year,xlab(1960(10)2000)ylab(-.2(.2).6) xtit(Year,height(6)) saving(h1.gph,replace) ///
		ytitle(Marginal effect of personalization on repression) legend(off) yline(0) bw(1)
		gen ltd = ln(gwf_leader_duration)
		twoway lpolyci d_xpers ltd,  ylab(0(.2).6) xtitle(Leader time in power,height(6))saving(h2.gph,replace) ///
		ytitle(Marginal effect of personalization on repression) legend(off) yline(0) bw(.5)
		gr combine h1.gph h2.gph ,ysize(1) xsize(2.25)
		graph export "$dir\repression-kernel-best-fit.pdf",as(pdf) replace
		erase h1.gph 
		erase h2.gph
		
		
*********************************************************************
**** Models where personalism measure doesn't contain purge item ****
*********************************************************************
		use temp,clear
		keep if year>=1955
		sum latent7 xpers
		spearman latent7 xpers
		xi:qui ivreg2 repression i.gwf_caseid i.year latent7 lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		lincom latent7
		est store nopurge1
		xtreg repression coldwar civwar intwar nav_protestV mean5 loggdp lpopl lji latent7,re cluster(gwf_caseid) theta
		lincom latent7
		est store nopurge2
		label var repress "Repression"
		label var latent7 "{bf:Personalism-7}"
		label var anocl "Anocracy"
		label var nav_protestV "Violent protest"
		label var coldwar "Cold war"
		label var mean5 "Protest"
		label var lji `""Judicial" "independence""'
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt "Leader time in power"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var institution "Institutions"
		coefplot (nopurge1, msymbol(d)) (nopurge2, msymbol(t)), title(Seven item IRT measure of Personalism, size(medium)) ///
				drop(_cons  _I*) order(latent7) xline(0) grid(glcolor(gs15)) mfcolor(white) xlabel(-1.5 (.5) .5) ///
				levels(95 90) xtitle("Coefficient estimate", height(6)) xsize(1.5) ysize(2) ///
				legend(lab(3 "Two-way FE") lab(6 "Best fit RE model") pos(6) ring(1) col(2))  ///
				note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-latent7.pdf", as(pdf)   replace
		
***************************************************
**** Models wwith subcomponents of personalism ****
***************************************************
		use temp,clear
		alpha $pvars1 $pvars2, std item gen(xpers_alpha)
		alpha $pvars1, std item gen(pers1)
		alpha $pvars2, std item gen(pers2)
		qui sum xpers_alpha
		replace xpers_alpha = (xpers_alpha+abs(r(min))) / (r(max) - r(min))
		qui sum pers1
		replace pers1 = (pers1+abs(r(min))) / (r(max) - r(min))
		qui sum pers2
		replace pers2 = (pers2+abs(r(min))) / (r(max) - r(min))
		spearman xpers xpers_alpha pers1 pers2
		xi:qui ivreg2 repression i.gwf_caseid i.year xpers_alpha lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		est store pp1
		xi:qui ivreg2 repression i.gwf_caseid i.year pers1 lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		lincom pers1
		est store pp2
		xi:qui ivreg2 repression i.gwf_caseid i.year pers2 lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		lincom pers2
		est store pp3
		xi:qui ivreg2 repression i.gwf_caseid i.year pers1 pers2 lt loggdp lpopl $conflictvar $leadervar institutions, cluster(gwf_leaderid) partial(i.gwf_caseid i.year)
		lincom pers1
		lincom pers2
		est store pp4
		label var repress "Repression"
		label var xpers_alpha "{bf:Personalism}"
		label var pers1 "{bf:Party personalism}"
		label var pers2 "{bf:Security personalism}"
		label var nav_protestV "Violent protest"
		label var coldwar "Cold war"
		label var mean5 "Protest"
		label var senior "Senior officer"
		label var junior "Junior officer"
		label var civwar "Civil conflict"
		label var intwar "Int'l conflict"
		label var mean5 "Protest"
		label var ld "Regime duration"
		label var lt "Leader time in power"
		label var loggdp "GDP per capita"
		label var lpopl "Population"
		label var institution "Institutions"
		coefplot (pp1, msymbol(d)) (pp2, msymbol(t)) (pp3, msymbol(s)) (pp4, msymbol(c)), ///
				title(Sub-components of personalism index, size(medium)) ///
				drop(_cons  _I*) order(latent7) xline(0) grid(glcolor(gs15)) mfcolor(white) xlabel(-1 (.5) 1) ///
				levels(95 90) xtitle("Coefficient estimate", height(6)) xsize(1.5) ysize(2) ///
				legend(lab(3 "Full index") lab(6 "Party personalism only") lab(9 "Security personalism only") ///
				lab(12 "Both sub-components") pos(6) ring(1) col(2) size(vsmall))  ///
				note("90 (thin) and 95 (thick) percent confidence intervals", size(vsmall) pos(6))
		graph export "$dir\repression-pers-subcomponents.pdf", as(pdf)   replace
			
				
 *********************************************************
 **** Oster (2017) unobservable selection bounds test ****
 *********************************************************
	use temp,clear
	xtset gwf_caseid year
	 * Bias reduction model: 2-way FE *
	qui xtreg  repression xpers i.year loggdp lpopl lt $conflictvar $leadervar,fe
	 local rmax = e(r2)*1.3
	 psacalc delta xpers,rmax(`rmax')  
	 psacalc beta xpers,rmax(`rmax') delta(1)
	 * Best fit specification: FE *
	qui xtreg repression xpers coldwar loggdp lpopl $conflictvar lji nav_protestV,fe
	 local rmax = e(r2)*1.3
	 psacalc delta xpers,rmax(`rmax')  
	 psacalc beta xpers,rmax(`rmax') delta(1)
	 * Best fit specification: OLS *
	qui reg repression xpers coldwar loggdp lpopl $conflictvar lji nav_protestV,
	 local rmax = e(r2)*1.3
	 psacalc delta xpers,rmax(`rmax')  
	 psacalc beta xpers,rmax(`rmax') delta(1)
	 
	gen n =_n
	gen beta=.
	gen rmax =.
	gen pie =.
	forval i = 1(1)50 {
		qui xtreg repression xpers i.year loggdp lpopl lt $conflictvar $leadervar,fe
		local rmax=e(r2)*(1+(`i'/25))
		qui replace rmax=`rmax' if n==`i'
		qui replace pie = 1+(`i'/25) if n==`i'
		qui psacalc beta xpers,  delta(1) rmax(`rmax')
		qui replace beta = r(beta) if n==`i'
	}
	twoway line beta pie if n<44,xtitle({&pi},size(large) height(6)) ///
	ytit("{&beta}{sub:Personalism}",size(large) height(6)) ylin(0,lcol(red)) xlab(1(.5)2.5) ///
	xline(1.3,lcol(blue))  title("Given {&delta}=1")  ///
	text(.27 1.175 "Oster (2017)" ,size(small)) ///
	text(.25 1.175 "rule of thumb",size(small)) ///
	text(.23 1.175 "{&pi}=1.3",size(small))	 
	graph export "$dir\sensitivity-analysis.pdf", as(pdf)   replace

 
		
******************************************************************
****** Address uncertainty in DV repression latent estimate ******
******************************************************************

 ************************************************************
 ** First do this for the structural model with two-way FE **
 ************************************************************
	use temp,clear
	set seed $seed
	xtset gwf_caseid year

* Generate DV and IV with uncertainty in original dataset
	gen latentmeansim = rnormal(latentmean,latentsd)
	egen repressionsim = std(latentmeansim)
	replace repressionsim = repressionsim*(-1)
	gen latentperssim = rnormal(pers_2pl,pers_se_2pl)
	qui sum latentperssim
	gen xperssim = (latentperssim+abs(r(min))) / (r(max) - r(min))

* Define a program for repreatedly generating DV and IV with uncertainty, and run regression model *
	capture program drop latentsim
	program define latentsim
		drop latentmeansim repressionsim latentperssim xperssim
		gen latentmeansim = rnormal(latentmean,latentsd)
		egen repressionsim = std(latentmeansim)
		replace repressionsim = repressionsim*(-1)
		gen latentperssim = rnormal(pers_2pl,pers_se_2pl)
		qui sum latentperssim
		gen xperssim = (latentperssim+abs(r(min))) / (r(max) - r(min))
		xi: ivreg2 repressionsim i.year i.gwf_caseid xperssim lt loggdp lpopl $conflictvar $leadervar institution, cluster(gwf_leaderid)  partial(i.gwf_caseid i.year)
	end

* Simulate 1000 times and store coefficients *
	simulate _b _se, rep($m):latentsim

* Save result to a dta file *
	save simbeta.dta,replace

* Calculate sims means and (between & within) standard errors from sims stored as variable values *
	gen varname =""
	gen meanbeta=.
	gen varbeta=.
	local var ="_b_seniorofficer _b_juniorofficer _b_institutions _b_lpopl _b_loggdp _b_mean5 _b_intwar _b_civwar _b_xperssim"
	local i =1
	gen n =_n
	foreach v of local var {
		replace varname = "`v'" if n==`i'
		qui sum `v'
		replace meanbeta = r(mean) if n==`i'
		replace varbeta = r(Var) if n==`i'
		local i = `i'+1
	}
	gen meanse=.
	local var ="_se_seniorofficer _se_juniorofficer _se_institutions _se_lpopl _se_loggdp _se_mean5 _se_intwar _se_civwar _se_xperssim"
	local i =1
	foreach v of local var {
		qui sum `v'
		replace meanse = r(mean) if n==`i'
		local i = `i'+1
	}
	
* Calculate overall standard errors *
	qui count
	gen simse = sqrt(meanse^2+varbeta*(1+1/r(N)))


	/* 
	Rubin (1987)
	Estimate of beta is the mean of the betas from each estimated beta 
	Estimate of variance is: Vb + Vw + Vb/m
		where Vb is the between variance, Vw is the mean variance, and Vb/m is the sampling variance:
			Vb is the sum of the squared deviations from the mean of the estimated betas
			Vw is the mean of the sampling variances (SE) from each of the m simluated variances
	*/ 

* Generate 95% CI *
	gen lo = meanbeta - 1.96*simse
	gen hi = meanbeta + 1.96*simse

* Plot coefficients with uncertainty *
	twoway (rspike lo hi n if n<=9,hori col(black)lwidth(medium)) (scatter n meanbeta if n<=9,col(black) msymbol(d) msize(medium) ///
		xtitle("Coefficient estimate", height(6)) ytitle("") legend(off) ysize(1) xsize(1.5) xline(0,lpat(dash)) mfcolor(white) ///
		title("Correlates of domestic repression", size(medium) ) ///
		ylab(1 `" "Senior" "officer""' 2 `" "Junior" "officer""' 3 "Institutions" 4 "Population"  5 "GDP per capita" 6 "Protest"  ///
		7 "Int'l conflict" 8 "Civil conflict" 9 "{bf:Personalism}") ylabel(, angle(0)))  ///
		(scatter n meanbeta if n<=9,col(red)msize(vsmall)msymbol(plus))
		graph export "repression-model-3.pdf", as(pdf)   replace
		
		
		
 ***************************************************
 ** Second do this for the best-fit model with RE **
 ****************************************************
	use temp,clear
	set seed $seed
	xtset gwf_caseid year

* Generate DV and IV with uncertainty in original dataset
	gen latentmeansim = rnormal(latentmean,latentsd)
	egen repressionsim = std(latentmeansim)
	replace repressionsim = repressionsim*(-1)
	gen latentperssim = rnormal(pers_2pl,pers_se_2pl)
	qui sum latentperssim
	gen xperssim = (latentperssim+abs(r(min))) / (r(max) - r(min))

* Define a program for repreatedly generating DV and IV with uncertainty, and run regression model *
	capture program drop latentsim
	program define latentsim
		drop latentmeansim repressionsim latentperssim xperssim
		gen latentmeansim = rnormal(latentmean,latentsd)
		egen repressionsim = std(latentmeansim)
		replace repressionsim = repressionsim*(-1)
		gen latentperssim = rnormal(pers_2pl,pers_se_2pl)
		qui sum latentperssim
		gen xperssim = (latentperssim+abs(r(min))) / (r(max) - r(min))
		xtreg repressionsim coldwar xperssim civwar intwar nav_protestV mean5 loggdp lpopl lji, cluster(gwf_caseid)   
	end

* Simulate 1000 times and store coefficients *
	simulate _b _se, rep($m):latentsim

* Save result to a dta file *
	save simbeta.dta,replace

* Calculate sims means and (between & within) standard errors from sims stored as variable values *
	gen varname =""
	gen meanbeta=.
	gen varbeta=.
	local var ="_b_lji _b_mean5 _b_lpopl _b_loggdp _b_nav_protestV _b_intwar _b_civwar _b_coldwar _b_xperssim"
	local i =1
	gen n =_n
	foreach v of local var {
		replace varname = "`v'" if n==`i'
		qui sum `v'
		replace meanbeta = r(mean) if n==`i'
		replace varbeta = r(Var) if n==`i'
		local i = `i'+1
	}
	gen meanse=.
	local var ="_se_lji _se_mean5 _se_lpopl _se_loggdp _se_nav_protestV _se_intwar _se_civwar _se_coldwar _se_xperssim"
	local i =1
	foreach v of local var {
		qui sum `v'
		replace meanse = r(mean) if n==`i'
		local i = `i'+1
	}
	
* Calculate overall standard errors *
	qui count
	gen simse = sqrt(meanse^2+varbeta*(1+1/r(N)))

* Generate 95% CI *
	gen lo = meanbeta - 1.96*simse
	gen hi = meanbeta + 1.96*simse

* Plot coefficients with uncertainty *
	twoway (rspike lo hi n if n<=9,hori col(black)lwidth(medium)) (scatter n meanbeta if n<=9,col(black) msymbol(d) msize(medium) ///
		xtitle("Coefficient estimate", height(6)) ytitle("") legend(off) ysize(1) xsize(1.5) xline(0,lpat(dash)) mfcolor(white) ///
		title("Correlates of domestic repression", size(medium) ) ///
		ylab(1 `" "Judicial" "independence""' 2 "Protest " 3 "Population" 4 "GDP per capita" 5 "Violent protest"  ///
		6 "Int'l conflict" 7 "Civil conflict" 8 "Cold War" 9 "{bf:Personalism}") ylabel(, angle(0)))  ///
		(scatter n meanbeta if n<=9,col(red)msize(vsmall)msymbol(plus))
		graph export "repression-model-4.pdf", as(pdf)   replace



		
 ************** The End **************
 
 log close
